%%%%%%%%%%%%%% Estimates full model, including robustness
%%%%%%%%%%%%%% based on rev4_factor_estimation_data_4step_types
clc
clear
clear global
cd ''

global f_ijs ind_id draw_n per_n ind_n c_char_n b_char_n m_char_n cont_char_n  thresh measures factor_n X_n X_s X_ind_s theta_xy lottery_n time_choice_n choices r_xy_table lottery_choice_ind time_choice_ind step0_coeffs step2_start  scores  n_type sim_theta_i sim_kappa_i est_p_type f_n sim_factor_contribs sim_1j_contribs maxima minima theta_i_cat_thresh r_scale sig_r_scale sig_th_scale  kappa_dim a1_exp b1_exp p_a1_exp a2_exp b2_exp p_a2_exp sig_th_lower_limit  CRRA2 est_kappa_vector  K_time x1_exp t1_exp x2_exp t2_exp  sig_r_sep        CARA hyper  x1 t1 x2 t2   analytical a1 b1 p_a1 a2 b2 p_a2

%%%%%%%%%%%%%%%%%%%%%%%% Factor Data Import %%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%% Import "factor_data_base.out" as a "Numeric Matrix", comma-delimited.
%%% Range A2:BA1225
%factordata=factordatabase;

%%% For robustness on factor proxies based on validation follow-up study,
%%% follow same procedure and import "factor_data_R1.out" 
% factordata=factordataR1;

% number individuals
ind_n=size(factordata,1);
% number factor draws in simulation
draw_n=200
%%% # factors
factor_n=4
%%% # of Xs influencing the factor (sex, language, and 3 age cats with the
%%% youngest omitted)
X_n=5
%%%% # of unobserved types
n_type=5;

% defalt is CRRA. Can switch below to CARA or Expo-power utility (the
% latter can only be used for risk estimation, not time estimation). CRRA2
% is an alternative formulation of CRRA which keeps utility positive. Used
% as robustness check in time estimation
CARA=0
ep=0
CRRA2=0

%%% final_factors=0: base specification
%%% final_factors=1: robustness using proxy measures from validation study
final_factors=0

if final_factors==0
    c_char_n=[10; 8; 10; 10];
    b_char_n=[4; 0; 6; 5];
    m_char_n=[6; 7; 4; 5];

    cont_char_n=[0; 1; 0; 0];
    thresh=[3 3 4 2];
    %%% # of error draws per individual
    draw_n=200;
end

if final_factors==1
    c_char_n=[10; 8; 10; 10];
    b_char_n=[7; 0; 7; 5];
    m_char_n=[3; 7; 3; 5];

    cont_char_n=[0; 1; 0; 0];
    thresh=[2 3 4 2];
    %%% # of error draws per individual
    draw_n=200;
end


load_size=max(c_char_n);
size_load=10;

% upper limit on discount rate
r_scale=1;
% upper limit on discount rate sd
sig_r_scale=1; 
% upper limit on risk aversion sd
sig_th_scale=1;
% lower limit on risk aversion sd
sig_th_lower_limit=.001;
%%% CARA coefficient of risk aversion has an approximately 1/20 scale
%%% relative to CRRA 
if CARA==1
    sig_th_scale=0.05;
end

%%% Additional estimation and optimization parameters
err_type_id=1
error_mag_id=1;
per_n=1;
char_n=1;
sig_r_sep=1;
iter_f=30;
iter_n=30;
iter_t=30;
iter_c=1;
maxima=[Inf;Inf;Inf;Inf;Inf];
minima=[-Inf;0;0;0;0];
theta_i_cat_thresh=[171;300];
analytical=1




%%%%%%%%%%%%%%%%%%%%%%%% Choice Data Import %%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%% Lotteries %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%% Choice Data %%%%%%%%%%%%%%
%%% Import "lottery_choice.out" as a "Numeric Matrix", comma-delimited.
%%% Range A2:BC1225

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Thresholds %%%%%%%%%%%%%

% in indifference_threshold_calculation, can switch to whether CRRA, CARA,
% or Expo-Power Utility is used.
run indifference_threshold_calculation
theta_xy=omega_th_final1';
lottery_n=size(theta_xy,2);

% lottery one is the safer one
a1=lotteries(:,1)';
b1=lotteries(:,2)';
p_a1=lotteries(:,3)';
% lottery 2 is the riskier one
a2=lotteries(:,4)';
b2=lotteries(:,5)';
p_a2=lotteries(:,6)';

a1_exp=kron(ones(ind_n*draw_n,1), a1);
b1_exp=kron(ones(ind_n*draw_n,1), b1);
p_a1_exp=kron(ones(ind_n*draw_n,1), p_a1);
a2_exp=kron(ones(ind_n*draw_n,1), a2);
b2_exp=kron(ones(ind_n*draw_n,1), b2);
p_a2_exp=kron(ones(ind_n*draw_n,1), p_a2);




%%%%%%%%%%%%%%%%%%%%%%%%%% Time Choices %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%% Temporal Choices %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%% Choice Data %%%%%%%%%%%%%%
%%% Import "temporal_choice.out" as a "Numeric Matrix", comma-delimited.
%%% Range A2:AV1225

load temporal_task_info
time_choice_n=size(choices,1);

% choice 1 is the sooner one
% $ in first option
x1=choices(:,1)';
% years from now that the $ will be received
t1=choices(:,2)';
% choice 2 is the more distant one
% $ in second option
x2=choices(:,3)';
% years from now that the $ will be received
t2=choices(:,4)';

x1_exp=kron(ones(ind_n*draw_n,1), x1);
t1_exp=kron(ones(ind_n*draw_n,1), t1);
x2_exp=kron(ones(ind_n*draw_n,1), x2);
t2_exp=kron(ones(ind_n*draw_n,1), t2);

kappa_dim=0;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Necessary variable generation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%% triggers whether set seed is used
rand_set=1
%%% seed for random draw
seed=1;

if rand_set==1
    rng(seed);
end;

%%% creates an index for the individuals, given the number of simulated
%%% draws   
ind_id = kron(ones(per_n,1), kron([1:ind_n]', ones(draw_n,1))); 

%%% Numbers the simulated factor draws for each individual
draw_id = kron(ones(ind_n*per_n,1), [1:draw_n]');

%%% Matrix of individual characteristics (each column is a variable). 
X_s=factordata(:,1:X_n);
%%% expands the characteristics vector such that is appears next to
%%% all the simulated draws for each individual
X_ind_s=kron(X_s, ones(per_n*draw_n,1));

%%% expand lottery choice matrix to fit simulated draws
lottery_choice_ind=kron(lotterychoice, ones(per_n*draw_n,1));
time_choice_ind=kron(temporalchoice, ones(per_n*draw_n,1));

% Matrix of measure data for all the factors combined (their sizes are
% given by how many of each type of measure (binary, multi-valued,
% continuous) are in each factor 
X_f=factordata(:,X_n+1:X_n+sum(c_char_n));
%%% expands the measures vector such that is appears next to
%%% all the simulated draws for each individual
measures=kron(X_f, ones(per_n*draw_n,1));

%%% Matrix of simulated individual factor draws, one column for each factor
% individual-level factor draws for the integral approximation
if err_type_id==1
    f_ijs=error_mag_id*randn(ind_n*draw_n*per_n,factor_n);
end




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ESTIMATION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


rng('shuffle')

%%% size of the nuisance parameter vector (threshold-mean) in facctor model
size_const=sum(b_char_n)+thresh*m_char_n+sum(cont_char_n);
%%% size of the loading vector for all factors combined
size_load=sum(c_char_n-1);
%%% number of coefficients to be estimated in factor model
size_factors=size_const+size_load+factor_n+X_n*factor_n+sum(cont_char_n);
%%% number of risk coefficients to be estimated 
size_risk=3*(factor_n+2+n_type-1)+n_type-1+kappa_dim;
n_theta_para=3
%%% number of time coefficients to be estimated 
size_time=(1+sig_r_sep)*(factor_n+2+n_type-1);





%%%%%%%%%%%%%%%%%%%%%% STEP 0 - Factors  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
options = optimset('Algorithm','sqp','Display','iter-detailed','TolFun',1e-5, 'TolX', 1e-5, 'MaxFunEvals', 100000, 'MaxIter', 150);

%%%%%%%%%%%%%%%%%%%%%%%%%%%% 1st Factor
%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
f_n=1;
size_const=b_char_n(f_n)+thresh(f_n)*m_char_n(f_n)+cont_char_n(f_n);
size_load=c_char_n(f_n)-1;
theta_start_factors=[randn(size_const+size_load+1+X_n+cont_char_n(f_n),1)];

%%%%%%%%%%%%%%%%%%%%%%%% Multi Factor Probit taking all types of vars
result_table_all0=NaN(size(theta_start_factors,1),iter_f);
fval_table0=NaN(1,iter_f);
nan_table0=NaN(1,iter_f);
i=1;
while i<=iter_f
                try
                    rng('shuffle')
                                
                    theta_start_factors=[randn(size_const+size_load+1+X_n+cont_char_n(f_n),1)];
                    [B_ML_probit_all_table, fval] = fminunc(@ml_separate_factors,theta_start_factors, options);           
                    catch ME 
                    fprintf('!!!had a NaN problem with actual starting values during optimisation!!!', ME.message);
                    iter_n
                    continue;  
                end;
      result_table_all0(:,i)=B_ML_probit_all_table;
      fval_table0(1,i)=fval 
      [logL, nan_pct]=ml_separate_factors(B_ML_probit_all_table);
      nan_table0(1,i)=nan_pct        
      i=i+1
      savefile = 'coeffs_step0_1.mat';
      save(savefile, 'result_table_all0', 'fval_table0', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n','iter_n', 'nan_table0', 'f_n');
end;

fval_table0(fval_table0==0)=NaN;
[M,I]=min(fval_table0);
col=I;
step0_coeffs=result_table_all0(:,col);
factor1_coeffs=step0_coeffs;

save(savefile, 'result_table_all0', 'fval_table0', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n','iter_n', 'nan_table0', 'f_n', 'factor1_coeffs');


%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2nd Factor
%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
f_n=2;
size_const=b_char_n(f_n)+thresh(f_n)*m_char_n(f_n)+cont_char_n(f_n);
size_load=c_char_n(f_n)-1;
theta_start_factors=[randn(size_const+size_load+1+X_n+cont_char_n(f_n),1)];

%%%%%%%%%%%%%%%%%%%%%%%% Multi Factor Probit taking all types of vars
result_table_all0=NaN(size(theta_start_factors,1),iter_f);
fval_table0=NaN(1,iter_f);
nan_table0=NaN(1,iter_f);
i=1;
while i<=iter_f
                try
                    rng('shuffle')
                               
                    theta_start_factors=[randn(size_const+size_load+1+X_n+cont_char_n(f_n),1)];
                    [B_ML_probit_all_table, fval] = fminunc(@ml_separate_factors,theta_start_factors, options);           
                    catch ME 
                    fprintf('!!!had a NaN problem with actual starting values during optimisation!!!', ME.message);
                    iter_n
                    continue; 
                end;
      result_table_all0(:,i)=B_ML_probit_all_table;
      fval_table0(1,i)=fval 
      [logL, nan_pct]=ml_separate_factors(B_ML_probit_all_table);
      nan_table0(1,i)=nan_pct        
      i=i+1
        savefile = 'coeffs_step0_2.mat';
         save(savefile, 'result_table_all0', 'fval_table0', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n','iter_n', 'nan_table0', 'f_n');
end;

fval_table0(fval_table0==0)=NaN;
[M,I]=min(fval_table0);
col=I;
step0_coeffs=result_table_all0(:,col);
factor2_coeffs=step0_coeffs;


save(savefile, 'result_table_all0', 'fval_table0', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n','iter_n', 'nan_table0', 'f_n', 'factor2_coeffs');

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%% 3rd Factor
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
f_n=3;
size_const=b_char_n(f_n)+thresh(f_n)*m_char_n(f_n)+cont_char_n(f_n);
size_load=c_char_n(f_n)-1;
theta_start_factors=[randn(size_const+size_load+1+X_n+cont_char_n(f_n),1)];

result_table_all0=NaN(size(theta_start_factors,1),iter_f);
fval_table0=NaN(1,iter_f);
nan_table0=NaN(1,iter_f);
i=1;
while i<=iter_f
                try
                    rng('shuffle')
                                
                    theta_start_factors=[randn(size_const+size_load+1+X_n+cont_char_n(f_n),1)];
                    [B_ML_probit_all_table, fval] = fminunc(@ml_separate_factors,theta_start_factors, options);           
                    catch ME 
                    fprintf('!!!had a NaN problem with actual starting values during optimisation!!!', ME.message);
                    iter_n
                    continue;  
                end;
      result_table_all0(:,i)=B_ML_probit_all_table;
      fval_table0(1,i)=fval 
      [logL, nan_pct]=ml_separate_factors(B_ML_probit_all_table);
      nan_table0(1,i)=nan_pct        
      i=i+1
        savefile = 'coeffs_step0_3.mat';
         save(savefile, 'result_table_all0', 'fval_table0', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n','iter_n', 'nan_table0', 'f_n');
end;

fval_table0(fval_table0==0)=NaN;
[M,I]=min(fval_table0);
col=I;
step0_coeffs=result_table_all0(:,col);
factor3_coeffs=step0_coeffs;


save(savefile, 'result_table_all0', 'fval_table0', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n','iter_n', 'nan_table0', 'f_n', 'factor3_coeffs');


%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4th Factor
%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
f_n=4;
size_const=b_char_n(f_n)+thresh(f_n)*m_char_n(f_n)+cont_char_n(f_n);
size_load=c_char_n(f_n)-1;
theta_start_factors=[randn(size_const+size_load+1+X_n+cont_char_n(f_n),1)];

result_table_all0=NaN(size(theta_start_factors,1),iter_f);
fval_table0=NaN(1,iter_f);
nan_table0=NaN(1,iter_f);
i=1;
while i<=iter_f
                try
                    rng('shuffle')
                                
                    theta_start_factors=[randn(size_const+size_load+1+X_n+cont_char_n(f_n),1)];
                    [B_ML_probit_all_table, fval] = fminunc(@ml_separate_factors,theta_start_factors, options);           
                    catch ME 
                    fprintf('!!!had a NaN problem with actual starting values during optimisation!!!', ME.message);
                    iter_n
                    continue; 
                end;
      result_table_all0(:,i)=B_ML_probit_all_table;
      fval_table0(1,i)=fval 
      [logL, nan_pct]=ml_separate_factors(B_ML_probit_all_table);
      nan_table0(1,i)=nan_pct        
      i=i+1
        savefile = 'coeffs_step0_4.mat';
         save(savefile, 'result_table_all0', 'fval_table0', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n','iter_n', 'nan_table0', 'f_n');
end;

fval_table0(fval_table0==0)=NaN;
[M,I]=min(fval_table0);
col=I;
step0_coeffs=result_table_all0(:,col);
factor4_coeffs=step0_coeffs;


save(savefile, 'result_table_all0', 'fval_table0', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n','iter_n', 'nan_table0', 'f_n', 'factor4_coeffs');



%%%% Combine together 
step0_coeffs=[factor1_coeffs; factor2_coeffs; factor3_coeffs; factor4_coeffs];


theta=step0_coeffs;
%%% shows estimated parameter values in categories. 
run comparator_general_data_sig_types
%%% gets simulated factors for use in next step of estimation
[logL, nan_pct, scores, sim_factor_contribs]=ml_factors(step0_coeffs);


% % %%%%%%%%%%%%%%%%%%%%%%%%%% Male and female mods 
% % Only use (uncomment) below if want to estimate model separately on the
% % male/female sample respectively
% women_ind=find(X_ind_s(:,1)==1);
% men_ind=find(X_ind_s(:,1)==0);
% 
% 
% %%%%%%%%%%%%%%%%%% Men %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %%% for men, sex=0, so the first estimated coefficient on structural
% parameter heterogeneity (the effect of sex) is meaningless in the male
% only sample and should be ignored
% ind_n=567
% scores=scores(men_ind,:);
% sim_factor_contribs=sim_factor_contribs(men_ind,:);
% X_ind_s=X_ind_s(men_ind,:);
% lottery_choice_ind=lottery_choice_ind(men_ind,:);
% time_choice_ind=time_choice_ind(men_ind,:);
% f_ijs=f_ijs(men_ind,:);
% ind_id=ind_id(men_ind,:);
%  measures=measures(men_ind,:);
% 
% % % %%%%%%%%%%%%%%%%%% Women %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ind_n=657
% scores=scores(women_ind,:);
% sim_factor_contribs=sim_factor_contribs(women_ind,:);
% X_ind_s=X_ind_s(women_ind,:);
% lottery_choice_ind=lottery_choice_ind(women_ind,:);
% time_choice_ind=time_choice_ind(women_ind,:);
% f_ijs=f_ijs(women_ind,:);
% ind_id=ind_id(women_ind,:);
%  measures=measures(women_ind,:);
%  
% % %%%%%%%%%%%%%%%%%%%%%%%%%%%%% End of Male/Female Data Mods Section




options = optimoptions('fminunc','FiniteDifferenceType','central', 'Algorithm','Quasi-Newton','Display','iter-detailed','TolFun',1e-5, 'TolX', 1e-7, 'HessUpdate', 'dfp', 'FiniteDifferenceStepSize',.1, 'MaxFunctionEvaluations', 100000, 'MaxIterations', 200);



%%%%%%%%%%%%%%%%%%%%%% STEP 1 - Risk only %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

theta_start_risk_only=[randn(size_risk,1)];

result_table_all1=NaN(size(theta_start_risk_only,1),iter_n);
fval_table1=NaN(1,iter_n);
exitflag_table1=NaN(1,iter_n);
nan_table1=NaN(1,iter_n);
i=1;
while i<=iter_n
                try
                    rng('shuffle')
                    
                    %%% Starting value
                    %%% generation to ensure that at start the generated
                    %%% type probas sum to <1 (otherwise get NaN). Take the log
                    %%% as the actual proba is exp() of the coeff.
                    
                    theta_start_risk_only=[randn(n_theta_para*(factor_n+1)+n_theta_para+kappa_dim,1)];
                    for ss=1:(n_type-1)
                        theta_start_risk_only=[theta_start_risk_only; log(1/n_type); randn(n_theta_para,1)];
                    end
                    

                    
                    
                    [B_ML_probit_all_table, fval, exitflag] = fminunc(@ml_risk_score_types_cdf, theta_start_risk_only, options);                                 
                    catch ME 
                    disp('Error Message:')
                    disp(ME.message)
                    iter_n
                     continue;  
                end;
      result_table_all1(:,i)=B_ML_probit_all_table;
      fval_table1(1,i)=fval 
      exitflag_table1(1,i)=exitflag
      [logL, nan_pct]=ml_risk_score_types_cdf(B_ML_probit_all_table);
      nan_table1(1,i)=nan_pct        
      i=i+1
        savefile = 'coeffs_step1.mat';
        save(savefile, 'result_table_all1', 'fval_table1', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n','iter_n', 'nan_table1', 'step0_coeffs', 'kappa_dim', 'exitflag_table1');


end;

%%% Gets lowest fval over the optimizations with random starting values
fval_table1(fval_table1==0)=NaN;
[M,I]=min(fval_table1)
col=I;
step1_coeffs=result_table_all1(:,col);

%%% shows estimated parameter values in categories
theta=step1_coeffs;
run comparator_general_data_sig_types

options = optimoptions('fminunc','FiniteDifferenceType','central', 'Algorithm','Quasi-Newton','Display','iter-detailed','TolFun',1e-5, 'TolX', 1e-7, 'HessUpdate', 'dfp', 'FiniteDifferenceStepSize',.1, 'MaxFunctionEvaluations', 100000, 'MaxIterations', 150);

%%%%%%%%%%%%%%%%%%%%%% STEP 1j - joint estimation of risk and factors coefficient with starting values previously obtained separately for numerical purposes %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

step1j_start=[step0_coeffs; step1_coeffs];

result_table_all1j=NaN(size(step1j_start,1),iter_n);
fval_table1j=NaN(1,iter_n);
nan_table1j=NaN(1,iter_n);
i=1;
%%% Only one iter, usually joint is pretty close to that obtained
%%% separately
while i<=1
                try
                    [B_ML_probit_all_table, fval] = fminunc(@ml_risk_factors_score_types_cdf,step1j_start, options);                                 
                    catch ME 
                    disp('Error Message:')
                    disp(ME.message)
                    continue; 
                end;
      result_table_all1j(:,i)=B_ML_probit_all_table;
      fval_table1j(1,i)=fval 
      %%% outputs simulated factors and risk aversion given factors for use in
      %%% the next estimation step
      [logL, nan_pct, scores, sim_factor_contribs, sim_theta_i, sim_kappa_i, est_p_type, sim_1j_contribs]=ml_risk_factors_score_types_cdf(B_ML_probit_all_table);
      nan_table1j(1,i)=nan_pct        
      i=i+1
        savefile = 'coeffs_step1j.mat';
        save(savefile, 'result_table_all1j', 'fval_table1j', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n','iter_n', 'nan_table1j', 'B_ML_probit_all_table', 'sim_factor_contribs', 'f_ijs', 'scores', 'sim_theta_i', 'sim_kappa_i', 'est_p_type', 'sim_1j_contribs');
end;

step1j_coeffs=result_table_all1j(:,1);

save data_step2

%%% inputs from below optimization will be later used to calculate
%%% clustered robust standard errors
[B_ML_probit_all_table, fval, exitflag,output,grad_risk1j,hess_risk1j] = fminunc(@ml_risk_score_types_cdf, step1j_coeffs(size_factors+1:end), options);

step2_start=[step1j_coeffs];
%%% shows estimated parameter values in categories
theta=step2_start;
run comparator_general_data_sig_types


% %%% for heterogeneity contribution calculation for simulation
% %%% with only risk preferences (expo-power utility), save data
% %%% below and stop here
% savefile = 'coeff_export_for_simulation.mat';
% save(savefile,'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n', 'n_type', 'ind_n', 'est_alpha', 'X_s', 'X_ind_s', 'f_ijs', 'est_load', 'est_sd', 'est_f_mean', 'est_sd_cont', 'est_thetas', 'est_kappas', 'est_sig_ths', 'est_p_types', 'est_types', 'sig_th_scale', 'theta_xy')

%%% below serves for robustness allowing a different intercept on mistakes in time
%%% tasks
est_kappa_vector=est_prefs_risk(:,3);


%%%%%%%%%%%%%%%%%%%%%% STEP 2: Time only %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%% Below are variables which can be turned on to perform various
%%% robustness checks listed in the online Appendix. Values are set by
%%% default such that the base specification from the body of the article
%%% is esimated



%%% toggle for allowing to have a separate intercept for Kappa on time
%%% tasks (adds one extra coefficient to estimate)
K_time=0

% upper limit on discount rates, set to 2 for robustness with 200% upper
% limit
r_scale=1

%%% sets discount rate calculation to hyperbolic (rather than exponential)
hyper=0

options = optimoptions('fminunc','FiniteDifferenceType','central', 'Algorithm','Quasi-Newton','Display','iter-detailed','TolFun',1e-5, 'TolX', 1e-7, 'HessUpdate', 'dfp', 'FiniteDifferenceStepSize',.1, 'MaxFunctionEvaluations', 100000, 'MaxIterations', 150);

size_time=(1+sig_r_sep)*(factor_n+2+n_type-1);
theta_start_time=[randn(size_time+K_time,1)];

result_table_all2=NaN(size(theta_start_time+K_time,1),iter_t);
fval_table2=NaN(1,iter_t);
nan_table2=NaN(1,iter_t);
i=1;
while i<=iter_t
                try

                    rng('shuffle')
                    theta_start_time=[randn(size_time+K_time,1)];
                    [B_ML_probit_all_table, fval] = fminunc(@ml_time_score_types_cdf,theta_start_time, options);                               
                    catch ME 
                    disp('Error Message:')
                    disp(ME.message)
                    iter_n
                    continue;  
                end;
      result_table_all2(:,i)=B_ML_probit_all_table;
      fval_table2(1,i)=fval 
      [logL, nan_pct]=ml_time_score_types_cdf(B_ML_probit_all_table);
      nan_table2(1,i)=nan_pct        
      i=i+1
        savefile = 'coeffs_step2.mat';
        save(savefile, 'result_table_all2', 'fval_table2', 'iter_n', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n', 'nan_table2', 'step2_start', 'CRRA2', 'K_time', 'sig_r_sep');
end; 

%%% Gets lowest fval over the optimizations with random starting values
fval_table2(fval_table2==0)=NaN;
[M,I]=min(fval_table2)
col=I;
step2_coeffs=result_table_all2(:,col);


%%%%%%%%%%%%%%%%%%%%%% STEP 2j - joint estimation of risk and time coefficients with starting values previously obtained separately for numerical purposes %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%% Obtaining starting values for estimation:
factor_1j=step2_start(1:size(step0_coeffs,1));
risk_1j=step2_start(size(step0_coeffs,1)+1:end);
theta1=risk_1j;
theta2=step2_coeffs;
step2j_start=[theta1(1:n_theta_para*(factor_n+1)); theta2(1:(1+sig_r_sep)*(factor_n+1))];
theta1(1:n_theta_para*(factor_n+1))=[];
theta2(1:(1+sig_r_sep)*(factor_n+1))=[];
for i=1:(n_type-1)
    step2j_start=[step2j_start; theta1(1:n_theta_para); theta2(1:(1+sig_r_sep)); theta1(n_theta_para+1)];
    theta1(1:n_theta_para+1)=[]; 
    theta2(1:(1+sig_r_sep))=[];
end
step2j_start=[step2j_start; theta1(1:n_theta_para); theta2(1:(1+sig_r_sep))];
theta1(1:n_theta_para)=[]; 
theta2(1:(1+sig_r_sep))=[];
if K_time==1
    step2j_start=[step2j_start; theta2(end)];
    theta2(end)=[];
end

%optimization
options = optimoptions('fminunc','FiniteDifferenceType','central', 'Algorithm','Quasi-Newton','Display','iter-detailed','TolFun',1e-5, 'TolX', 1e-7, 'HessUpdate', 'dfp', 'FiniteDifferenceStepSize',.1, 'MaxFunctionEvaluations', 100000, 'MaxIterations', 200); 

[B_ML_probit_all_table, fval] = fminunc(@ml_risk_time_score_types_cdf,step2j_start, options);    
step2j_coeffs=B_ML_probit_all_table;

savefile = 'coeffs_step2j.mat.mat';
save(savefile, 'B_ML_probit_all_table', 'fval', 'iter_n', 'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n', 'step2j_start', 'factor_1j', 'step2j_coeffs', 'r_scale', 'analytical', 'n_theta_para', 'CRRA2', 'K_time', 'sig_r_sep', 'hyper', 'CARA');

%%% Extracting estimated structural coefficients for use in simulation and
%%% calculation of heterogeneity contributions

theta=[factor_1j; step2j_coeffs];
run comparator_general_data_sig_types;

%%% "est_load" (factor loadings), "est_sd", and "est_sd" cont allow the
%%% replication of Table A2. The signal to noise ratio=loading^2*sd^2;
%%% where the loading is proper to a measure and the sd is proper to a
%%% factor. For categorical measures, the standard deviation of the error
%%% term is assumed to be 0, so no further modification is needed. For the
%%% continuous numeracy measure in cognitive ability, the standard
%%% deviation is estimated which meeans one needs to further  
%%% divide by est_sd^2.

%%% "est_av_sim_prefs" give parameter values for the average
%%% person (Table 3 of paper)
[coeffs, fval, est_av_sim_prefs] = ml_risk_time_score_types_cdf([factor_1j; step2j_coeffs]);                   

est_inv_types=est_types';
est_pres_types=[est_inv_types(:,1) est_inv_types(:,3:5) est_inv_types(:,2)];
est_prefs=[est_pres_types; [est_thetas est_sig_ths est_rs est_sig_rs est_kappas] ];

%%%obtains mean factor values for Table 6 of the paper
mean_factors=est_f_mean+est_alpha'*mean(X_s)';
%%%obtains estimates of the standard deviation on the orthogonal part of
%%%factors for Table 6 of the paper 
est_sd
%%%obtains structural coefficients on the deterministic part on factors for
%%%Table 6 of the paper 
est_alpha_tr=est_alpha';

%%% Output estimated structural coefficient for simulation
savefile = 'est_coeff_export.mat';
save(savefile,'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n', 'n_type', 'ind_n', 'est_alpha', 'X_s', 'X_ind_s', 'f_ijs', 'est_load', 'est_sd', 'est_f_mean', 'est_sd_cont', 'est_thetas', 'est_kappas', 'est_sig_ths', 'est_rs', 'est_sig_rs', 'est_p_types', 'est_types', 'est_prefs', 'r_xy_table', 'sig_th_scale', 'theta_xy', 'r_xy_table', 'r_scale', 'n_theta_para', 'theta_i_cat_thresh', 'CRRA2', 'K_time', 'sig_r_sep', 'hyper', 'CARA');
%%% for K_time, add the var at the end
%save(savefile,'thresh', 'factor_n', 'c_char_n', 'b_char_n', 'X_n', 'm_char_n', 'cont_char_n', 'n_type', 'ind_n', 'est_alpha', 'X_s', 'X_ind_s', 'f_ijs', 'est_load', 'est_sd', 'est_f_mean', 'est_sd_cont', 'est_thetas', 'est_kappas', 'est_sig_ths', 'est_rs', 'est_sig_rs', 'est_p_types', 'est_types', 'est_prefs', 'r_xy_table', 'sig_th_scale', 'theta_xy', 'r_xy_table', 'est_K_time');



%%% clustered robust standard errors for the structural coefficients
%%% related to risk and time preferences and consistency parameters are
%%% calculated in a separate file
run clustered_robust_sd_calc


%%% record standard errors
se_prefs=[se_vector_n_risk_robust_cluster; se_vector_n_time_robust_cluster];
se_prefs_inv=[se_prefs(1:5) se_prefs(6:10) se_prefs(11:15) se_prefs(16:20) se_prefs(21:25)];

%%% replicates Appendix Table A4 and robustness in Table A5 
est_prefs_excel=[ est_pres_types; est_prefs(6,:); se_prefs_inv(1,:); est_prefs(7,:); se_prefs_inv(2,:); est_prefs(8,:); se_prefs_inv(3,:); est_prefs(9,:); se_prefs_inv(4,:); est_prefs(10,:); se_prefs_inv(5,:) ];



